Libraries
#library(menbayes)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
Load Data
alt_g <- readRDS("~/thesis/menbayes_analysis/output/sims/alt_sims_g.RDS")
alt_gl <- readRDS("~/thesis/menbayes_analysis/output/sims/alt_sims_gl.RDS")
null_g <- read.csv("~/thesis/menbayes_analysis/output/sims/null_sims_g.csv")
null_gl <- read.csv("~/thesis/menbayes_analysis/output/sims/null_sims_gl.csv")
Alternative Simulations
In this section, we run simulations under the alternative of segregation distortion to evaluate the relative strength of evidence for the hypotheses listed in Section 2.4. We simulated \(n \in {10, 100, 1000}\) individuals under the genotype frequency distributions \(q = (\frac{1}{5}, \frac{1}{5}, \frac{1}{5}, \frac{1}{5}, \frac{1}{5})\) and \(q = (\frac{2}{5}, \frac{1}{5}, \frac{0}{5}, \frac{1}{5}, \frac{2}{5})\). We repeated simulations of individuals under all combinations of \(n\) and \(q\) 100 times. Additionally, we simulated genotype likelihoods using the model of (Gerard et. al., 2018) with a read depth of 10 or 100 using the procedure outlined in Section 3.1.
Alternative Simulations when Genotypes are Known
## Boxplots
alt_g <- alt_g %>%
mutate(n = as.factor(n),
genofreq = factor(genofreq, levels=c("c(0.2, 0.2, 0.2, 0.2, 0.2)","c(0.4, 0.1, 0, 0.1, 0.4)")))
alt_g_bp <- ggplot(data = alt_g, mapping = aes(x = n, y = logbf, color = n)) +
geom_boxplot() +
facet_wrap(~genofreq, scales = "free") +
xlab("Sample Size") +
ylab("Log Bayes Factor") +
theme_bw() +
theme(strip.background = element_rect(fill = "white")) +
geom_hline(yintercept = 0, lty = 2)
ggplotly(alt_g_bp)
Alternative Simulations using Genotype Likelihoods
## Boxplots
alt_gl <- alt_gl %>%
mutate(n = as.factor(n),
genofreq = factor(genofreq, levels=c("c(0.2, 0.2, 0.2, 0.2, 0.2)","c(0.4, 0.1, 0, 0.1, 0.4)")))
alt_gl_bp <- ggplot(data = alt_gl, mapping = aes(x = n, y = logbf, color = n)) +
geom_boxplot() +
facet_wrap(~genofreq) +
xlab("Sample Size") +
ylab("Log Bayes Factor") +
theme_bw() +
theme(strip.background = element_rect(fill = "white")) +
geom_hline(yintercept = 0, lty = 2)
ggplotly(alt_gl_bp)
Null Simulations
In this section, we simulated offspring genotypes from parental genotypes using the following \(G_1, G_2, n, \alpha,\) and \(\xi\):
\((G_1, G_2) \in \{(0, 1), (0, 2), (1, 1), (1, 2), (2, 2)\}\) \(n \in \{10, 100, 1000\}\) \(\alpha \in \{0, 1/12, 1/6\}\) \(\xi \in \{0, 1/6, 1/3\}\)
Null Simulations when Genotypes are Known
We simulated offspring genotypes given parental genotypes using the gamete probabilities from Table 2.2 and the offspring genotypes from Table 1.1. Genotypes were simulated given those offspring frequencies from a multinomial distribution (2.1).
## Q-Q Plot of the Chi-Square p-values
ggplot(null_g, aes(sample=chisq_pvalue)) +
stat_qq_line() +
stat_qq(size = 2, color = "hotpink") +
ggtitle("Uniform Q-Q Plot of Chi-Sq p-values (Null Sims, Genotypes Known)") +
xlab("Theoretical Quantiles") +
ylab("Sample Quantiles") +
theme_bw() +
theme(strip.background = element_rect(fill = "white"))
## Boxplots
null_g %>%
mutate(n = as.factor(n),
`Parent Genotypes` = paste0("(", p1, ",", p2, ")"),
alpha = paste0("alpha==", as.character(MASS::fractions(alpha))),
xi = paste0("xi==", as.character(MASS::fractions(xi))),
xi = parse_factor(xi)) %>%
ggplot(mapping = aes(x = n, y = logbf, color = `Parent Genotypes`)) +
geom_boxplot() +
facet_grid(alpha ~ xi, labeller = label_parsed) +
xlab("Sample Size") +
ylab("Log Bayes Factor") +
theme_bw() +
theme(strip.background = element_rect(fill = "white")) +
geom_hline(yintercept = 0, lty = 2)
Null Simulations using Genotype Likelihoods
## Q-Q Plot of the Chi Square values
ggplot(null_gl, aes(sample=chisq_pvalue)) +
stat_qq_line() +
stat_qq(size = 2, color = "hotpink") +
ggtitle("Uniform Q-Q Plot of Chi-Sq p-values (Null Sims, Genotype Likelihoods)") +
xlab("Theoretical Quantiles") +
ylab("Sample Quantiles") +
theme_bw() +
theme(strip.background = element_rect(fill = "white"))
## Boxplots
# Log Bayes Factor
null_gl %>%
mutate(n = as.factor(n),
alpha = paste0("alpha==", as.character(MASS::fractions(alpha))),
xi = paste0("xi==", as.character(MASS::fractions(xi))),
xi = parse_factor(xi)) %>%
ggplot(mapping = aes(x = n, y = logbf, color = n)) +
geom_boxplot() +
facet_grid(alpha ~ xi, labeller = label_parsed) +
xlab("Sample Size") +
ylab("Log Bayes Factor") +
theme_bw() +
theme(strip.background = element_rect(fill = "white")) +
geom_hline(yintercept = 0, lty = 2)
# Chi square Statistic
null_gl %>%
mutate(n = as.factor(n),
alpha = paste0("alpha==", as.character(MASS::fractions(alpha))),
xi = paste0("xi==", as.character(MASS::fractions(xi))),
xi = parse_factor(xi)) %>%
ggplot(mapping = aes(x = n, y = chisq_stat, color = n)) +
geom_boxplot() +
facet_grid(alpha ~ xi, labeller = label_parsed) +
xlab("Sample Size") +
ylab("Log Bayes Factor") +
theme_bw() +
theme(strip.background = element_rect(fill = "white")) +
geom_hline(yintercept = 0, lty = 2)
## Warning: Removed 14913 rows containing non-finite values (`stat_boxplot()`).
# Posterior Mean of alpha
null_gl %>%
mutate(n = as.factor(n),
alpha = paste0("alpha==", as.character(MASS::fractions(alpha))),
xi = paste0("xi==", as.character(MASS::fractions(xi))),
xi = parse_factor(xi)) %>%
ggplot(mapping = aes(x = n, y = pm_alpha, color = n)) +
geom_boxplot() +
facet_grid(alpha ~ xi, labeller = label_parsed) +
xlab("Sample Size") +
ylab("Posterior Mean of Alpha") +
theme_bw() +
theme(strip.background = element_rect(fill = "white")) +
geom_hline(yintercept = 0, lty = 2)
# Posterior Mean of xi
null_gl %>%
mutate(n = as.factor(n),
alpha = paste0("alpha==", as.character(MASS::fractions(alpha))),
xi = paste0("xi==", as.character(MASS::fractions(xi))),
xi = parse_factor(xi)) %>%
ggplot(mapping = aes(x = n, y = pm_xi, color = n)) +
geom_boxplot() +
facet_grid(alpha ~ xi, labeller = label_parsed) +
xlab("Sample Size") +
ylab("Posterior Mean of Xi") +
theme_bw() +
theme(strip.background = element_rect(fill = "white")) +
geom_hline(yintercept = 0, lty = 2)
# looking at pm alpha
hist1 <- ggplot(null_g, mapping = aes(x = pm_alpha, color = as.factor(alpha))) +
geom_histogram(bins = 100) +
facet_grid(rows = vars(n), cols = vars(alpha)) +
theme_bw()
ggplotly(hist1)
# looking at pm xi
hist2 <- ggplot(null_g, mapping = aes(x = pm_xi, color = as.factor(xi))) +
geom_histogram(bins = 100) +
facet_grid(rows = vars(n), cols = vars(xi)) +
theme_bw()
ggplotly(hist2)